import pymysql
import requests
from lxml import html

title, username, view = [], [], []

# 打开数据库连接
print("正在连接MYSQL数据库")
db = pymysql.connect(host="localhost", user="root", password="123456", database="ydong")
# 使用 cursor() 方法创建一个游标对象 cursor
cursor = db.cursor()
print("数据库连接成功")
# 使用 execute() 方法执行 SQL，如果表存在则删除
cursor.execute("DROP TABLE IF EXISTS discuz")
# 使用预处理语句创建表
sql = """
          CREATE TABLE discuz (
          title varchar(50) NOT NULL,
          username varchar(50) DEFAULT NULL,
          view varchar(50) DEFAULT NULL) CHARSET=utf8;
      """
# 执行SQL语句
cursor.execute(sql)

def insertDB(title, username, view):
    # sql语句
    insert_sql = """insert ignore into discuz(title,username,view) VALUES(%s,%s,%s)"""
    cursor.execute(insert_sql, (title, username, view))
    db.commit()


for i in range(1,30):
    # 设置请求头
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3877.400 QQBrowser/10.8.4506.400'}
    response = requests.get(url='https://www.discuz.net/forum-developer-{}.html'.format(i), headers=header).text
    response = html.etree.HTML(response)

    # 使用xpath爬取指定标签
    if len(response.xpath("//th[@class='lock']/a[4]/text()")) == 1:
        title.extend(response.xpath("//th[@class='lock']/a[4]/text()"))
    title.extend(response.xpath("//th[@class='new']/a[3]/text()"))

    for i in range(len(response.xpath("//td[@class='by']/cite/a/text()"))):
        if i % 2 == 0:
            username.append(response.xpath("//td[@class='by']/cite/a/text()")[i])

    view.extend(response.xpath("//td[@class='num']/em/text()"))
    print(title, username, view)
    print(len(title), len(username), len(view))
    #爬取结果写入数据库
    for j in range(len(title)):
        insertDB(title[j], username[j], view[j])

# 关闭游标，提交，关闭数据库连接
# 如果没有这些关闭操作，执行后在数据库中查看不到数据
print("关闭游标，提交，关闭数据库连接")
cursor.close()
db.close()
